import numpy as np
import pandas as pd
import pdfplumber
import xlwt

workbook = xlwt.Workbook()
# 添加sheet
sheet = workbook.add_sheet('Sheet1')
i = 0

path = 'D:/Download/IndiaHistory/List of Companies registered for Year 1981.pdf'

pdf = pdfplumber.open(path)
for page in pdf.pages:
    for table in page.extract_tables():
        df = pd.DataFrame(table)
        df.columns = np.array([i.lower().strip().replace(' ', '_') for i in df.iloc[0].values.tolist()])
        df = df.iloc[:, 1:]
        df = df.drop(index=0)
        print(df.columns.values)